/*
//
//                  INTEL CORPORATION PROPRIETARY INFORMATION
//     This software is supplied under the terms of a license agreement or
//     nondisclosure agreement with Intel Corporation and may not be copied
//     or disclosed except in accordance with the terms of that agreement.
//          Copyright(c) 2003-2007 Intel Corporation. All Rights Reserved.
//
//     Intel Integrated Performance Primitives AAC Encode Sample for Windows*
//
//  By downloading and installing this sample, you hereby agree that the
//  accompanying Materials are being provided to you under the terms and
//  conditions of the End User License Agreement for the Intel Integrated
//  Performance Primitives product previously accepted by you. Please refer
//  to the file ippEULA.rtf or ippEULA.txt located in the root directory of your Intel IPP
//  product installation for more information.
//
//  MPEG-4 and AAC are international standards promoted by ISO, IEC, ITU, ETSI
//  and other organizations. Implementations of these standards, or the standard
//  enabled platforms may require licenses from various entities, including
//  Intel Corporation.
//
*/

#include "umc_defs.h"

#if defined (UMC_ENABLE_AAC_INT_AUDIO_ENCODER)

#include <math.h>
#include "ippac.h"
#include "ipps.h"
#include "aac_enc_ltp_int.h"
#include "align.h"

/********************************************************************/

static Ipp32s g_ltp_coef[] = {
  0x2488765c, 0x2c955b46, 0x340841ee, 0x3a52ce03,
  0x3f089a02, 0x44586013, 0x4c7457c1, 0x57a66dbd
};

/********************************************************************/

void aaciencLtpEncode(Ipp32s *inBuf,
                      Ipp16s *ltpBuf,
                      Ipp32s *predictedBuf,
                      Ipp32s *ltpDelay,
                      Ipp32s *ltpInd,
                      IppsFFTSpec_R_32s* corrFft,
                      Ipp8u* corrBuff)
{
  __ALIGN Ipp32s tmp1[4096];
  __ALIGN Ipp32s corr[4096];
  __ALIGN Ipp32s ltpBuf32s[4096];
  Ipp32sc *s, *d;
  Ipp32sc res, res2;
  Ipp32s saveSqCorr, en, saveEn, tmpCorr;
  Ipp32s real_coef;
  Ipp32s lag_size, start, delay, ind;
  Ipp32s i;
  Ipp32s scalef, shift, min, max;

  /* Note: inBuf[2048 : 4095] = 0 */

  for (i = 0; i < 3072; i++) {
    ltpBuf32s[i] = ltpBuf[i+1] << 16;
  }

  ippsZero_32s(ltpBuf32s + 3072, 1024);

  ippsFFTFwd_RToPack_32s_Sfs(inBuf, tmp1, corrFft, 12, corrBuff);
  ippsFFTFwd_RToPack_32s_Sfs(ltpBuf32s, corr, corrFft, 12, corrBuff);

  ippsMinMax_32s(tmp1, 4096, &min, &max);

  min = -min;
  if (min > max) {
    max = min;
  }

  scalef = 32;

  if (max != 0) {
    while (max <= 0x3FFFFFFF) {
      max *= 2;
      scalef--;
    }
  }

  ippsMinMax_32s(corr, 4096, &min, &max);

  min = -min;
  if (min > max) {
    max = min;
  }

  if (max != 0) {
    while (max <= 0x3FFFFFFF) {
      max *= 2;
      scalef--;
    }
  }

  if (scalef < 0) scalef = 0;

  tmp1[0] = (Ipp32s)(((Ipp64s)tmp1[0] * corr[0]) >> scalef);
  tmp1[4095] = (Ipp32s)(((Ipp64s)tmp1[4095] * corr[4095]) >> scalef);

  s = (Ipp32sc*)(corr + 1);
  d = (Ipp32sc*)(tmp1 + 1);

  for (i = 0; i < 2046; i += 2) {
    res.re = (Ipp32s)(((Ipp64s)s[i].re * d[i].re + (Ipp64s)s[i].im * d[i].im) >> scalef);
    res.im = (Ipp32s)(((Ipp64s)s[i].im * d[i].re - (Ipp64s)s[i].re * d[i].im) >> scalef);
    d[i] = res;
    res2.re = (Ipp32s)(((Ipp64s)s[i+1].re * d[i+1].re + (Ipp64s)s[i+1].im * d[i+1].im) >> scalef);
    res2.im = (Ipp32s)(((Ipp64s)s[i+1].im * d[i+1].re - (Ipp64s)s[i+1].re * d[i+1].im) >> scalef);
    d[i+1] = res2;
  }

  res.re = (Ipp32s)(((Ipp64s)s[2046].re * d[2046].re + (Ipp64s)s[2046].im * d[2046].im) >> scalef);
  res.im = (Ipp32s)(((Ipp64s)s[2046].im * d[2046].re - (Ipp64s)s[2046].re * d[2046].im) >> scalef);
  d[2046] = res;

  ippsFFTInv_PackToR_32s_Sfs(tmp1, corr, corrFft, 0, corrBuff);

  ippsMinMax_32s(corr, 2048, &min, &max);

  min = -min;
  if (min > max) {
    max = min;
  }

  shift = 0;

  if (max != 0) {
    while (max >= 32768) {
      max >>= 1;
      shift++;
    }
  }

  ippsRShiftC_32s_I(shift, corr, 2048);

  ippsDotProd_16s32s_Sfs(ltpBuf + 2048, ltpBuf + 2048, 1024, &en, 11);

  saveSqCorr = corr[2047] * corr[2047];
  delay = 0;
  saveEn = en;
  if (corr[2047] < 0)
    saveSqCorr = -saveSqCorr;

  for (i = 1; i <= 1024; i++) {
    en += (Ipp32s)(((Ipp32s)ltpBuf[2048 - i] * (ltpBuf[2048 - i] >> 1)) >> 10);

    if (corr[2047 - i] >= 0) {
      tmpCorr = corr[2047 - i] * corr[2047 - i];
      if ((Ipp64s)saveSqCorr * en < (Ipp64s)tmpCorr * saveEn) {
        saveSqCorr = tmpCorr;
        delay = i;
        saveEn = en;
      }
    }
  }

  for (i = 1024 + 1; i < 2048; i++) {
    en += (Ipp32s)(((Ipp32s)ltpBuf[2048 - i] * (ltpBuf[2048 - i] >> 1)) >> 10);
    en -= (Ipp32s)(((Ipp32s)ltpBuf[4096 - i] * (ltpBuf[4096 - i] >> 1)) >> 10);

    if (corr[2047 - i] >= 0) {
      tmpCorr = corr[2047 - i] * corr[2047 - i];
      if ((Ipp64s)saveSqCorr * en < (Ipp64s)tmpCorr * saveEn) {
        saveSqCorr = tmpCorr;
        delay = i;
        saveEn = en;
      }
    }
  }

  tmpCorr = corr[2047 - delay];

  ind = 0;

  if (tmpCorr < 0) {
    *ltpDelay = -1;
    return;
  } else if (saveEn >= 0) {
    shift = scalef+shift-19;
    if (shift <= 0) {
      tmpCorr >>= (-shift);
    } else {
      if (tmpCorr >= (1 << (31 - shift))) {
        ind = 7;
      } else {
        tmpCorr <<= shift;
      }
    }

    if (ind == 0) {
      for (i = 0; i < 8; i++) {
        ind = i;
        if (tmpCorr <= (Ipp32s)(((Ipp64s)g_ltp_coef[i] * saveEn) >> 30))
          break;
      }

      if (ind > 0) {
        Ipp32s tmp0 = tmpCorr - (Ipp32s)(((Ipp64s)g_ltp_coef[ind - 1] * saveEn) >> 30);
        Ipp32s tmp1 = tmpCorr - (Ipp32s)(((Ipp64s)g_ltp_coef[ind] * saveEn) >> 30);
        if ((Ipp64s)tmp0 * tmp0 < (Ipp64s)tmp1 * tmp1) {
          ind -= 1;
        }
      }
    }
  }

  real_coef = g_ltp_coef[ind];
  lag_size = 2048;
  start = 2048 - delay;

  if (delay < 1024)
    lag_size = 1024 + delay;

  for (i = 0; i < lag_size; i++) {
    predictedBuf[i] = (Ipp32s)((real_coef * (Ipp64s)ltpBuf[i + start] + 32768) >> 16);
  }
  for (; i < 2048; i++)
    predictedBuf[i] = 0;

  *ltpDelay = delay;
  *ltpInd = ind;
}

/****************************************************************************/

void aaciencLtpBufferUpdate(Ipp16s **ltpBuffer,
                            Ipp32s **prevSamples,
                            Ipp32s **predictedSpectrum,
                            sEnc_channel_pair_element *pElement,
                            Ipp32s *sfb_offset_short,
                            Ipp32s *prevWindowShape,
                            Ipp32s numCh)
{
  __ALIGN Ipp32s spectrum[2][1024];
  __ALIGN Ipp32s spectrum_i[1024];
  __ALIGN Ipp32s currSamples[1024];
  sEnc_individual_channel_stream *pStream[2];
  Ipp32s         *pSpectrumData;
  Ipp32s         i, ch;

  pStream[0] = pElement->p_individual_channel_stream_0;
  pStream[1] = pElement->p_individual_channel_stream_1;

  for (ch = 0; ch < numCh; ch++) {
    Ipp32s  max_sfb = pStream[ch]->max_sfb;
    Ipp32s  max_line = pStream[ch]->max_line;

    if (max_line > 0) {
      if (pStream[ch]->windows_sequence != EIGHT_SHORT_SEQUENCE) {
        ippsPow43Scale_16s32s_Sf(pStream[ch]->x_quant, spectrum[ch],
                                 pStream[ch]->scale_factors,
                                 pStream[ch]->sfb_offset,
                                 SF_OFFSET, max_sfb, 1, -2);
      } else {
        Ipp16s *scale_factors = pStream[ch]->scale_factors;
        Ipp32s *sfb_offset = pStream[ch]->sfb_offset;
        Ipp32s num_window_groups = pStream[ch]->num_window_groups;
        Ipp32s g;

        for (g = 0; g < num_window_groups; g++) {
          ippsPow43Scale_16s32s_Sf(pStream[ch]->x_quant,
                                   spectrum[ch], scale_factors,
                                   sfb_offset, SF_OFFSET, max_sfb, 1, -5);

          scale_factors += max_sfb;
          sfb_offset += max_sfb;
        }
      }
    }
  }

  if (pElement->ms_mask_present == 2){
    Ipp32s *ptrL = spectrum[0];
    Ipp32s *ptrR = spectrum[1];
    Ipp32s li, ri;
    Ipp32s k;

    for (k = 0; k < pStream[0]->max_line; k++) {
      li = (ptrL[k] + ptrR[k]);
      ri = (ptrL[k] - ptrR[k]);
      ptrL[k] = li;
      ptrR[k] = ri;
    }
  }

  for (ch = 0; ch < numCh; ch++) {
    Ipp32s  max_sfb = pStream[ch]->max_sfb;
    Ipp32s  max_line = pStream[ch]->max_line;

    if (pStream[ch]->windows_sequence != EIGHT_SHORT_SEQUENCE) {
      if (pStream[ch]->predictor_data_present) {
        ippsAdd_32s_ISfs(predictedSpectrum[ch], spectrum[ch], max_line, 0);
      }
      ippsZero_32s(spectrum[ch] + max_line, 1024 - max_line);
      pSpectrumData = spectrum[ch];
    } else {

      /* Ipp16s block deinterleave */
      Ipp32s *len_window_group = pStream[ch]->len_window_group;
      Ipp32s num_window_groups = pStream[ch]->num_window_groups;
      Ipp32s *ptrIn = spectrum[ch];
      Ipp32s *ptrOut = spectrum_i;
      Ipp32s sfb, w, j, g;

      for (g = 0; g < num_window_groups; g++) {
        for (sfb = 0; sfb < max_sfb; sfb++) {
          Ipp32s sfb_start = sfb_offset_short[sfb];
          Ipp32s sfb_end = sfb_offset_short[sfb+1];
          Ipp32s sfb_width = sfb_end - sfb_start;

          for (j = 0; j < len_window_group[g]; j++) {
            for (w = 0; w < sfb_width; w++) {
              ptrOut[w + sfb_start + 128 * j] = *ptrIn;
              ptrIn++;
            }
          }
        }

        for (j = 0; j < len_window_group[g]; j++) {
          for (w = sfb_offset_short[max_sfb]; w < 128; w++) {
            ptrOut[w + 128 * j] = 0;
          }
        }
        ptrOut += 128 * len_window_group[g];
      }
      pSpectrumData = spectrum_i;
    }

    ippsMDCTInv_AAC_32s_I(pSpectrumData, prevSamples[ch],
                          pStream[ch]->windows_sequence,
                          pStream[ch]->window_shape,
                          prevWindowShape[ch], 1024);

    for (i = 0; i < 1024; i++) {
      ltpBuffer[ch][i] = ltpBuffer[ch][i + 1024];
    }

    ippsConvert_32s16s_Sfs(currSamples,     ltpBuffer[ch] + 1024, 1024, 7);
    ippsConvert_32s16s_Sfs(prevSamples[ch], ltpBuffer[ch] + 2048, 1024, 7);
  }
}

#endif //UMC_ENABLE_AAC_INT_AUDIO_ENCODER

